# Load necessary libraries
library(DBI)
library(RSQLite)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
# Connect to the SQLite database
conn <- dbConnect(SQLite(), "/Users/asdishalla/Desktop/capstone-the-north/data/rotten_tomatoes.db")

### Query for Top 10 Tomatometer-Rated Movies
query_top10 <- "
SELECT movie_title, tomatometer_rating, audience_rating, directors, actors, original_release_date
FROM rotten_tomatoes_movies
WHERE actors LIKE '%Kate Winslet%' OR actors LIKE '%Leonardo DiCaprio%'
ORDER BY tomatometer_rating DESC
LIMIT 10
"
top10_movies <- dbGetQuery(conn, query_top10)

# Extract unique directors for Tomatometer
directors <- unique(top10_movies$directors)

### Query for All Movies by These Directors
query_all_movies <- sprintf("
SELECT movie_title, tomatometer_rating, audience_rating, directors, actors, original_release_date
FROM rotten_tomatoes_movies
WHERE directors IN (%s)
", paste(sprintf("'%s'", directors), collapse = ", "))
all_movies <- dbGetQuery(conn, query_all_movies)

### Query for Top 10 Audience-Rated Movies
query_top10_audience <- "
SELECT movie_title, tomatometer_rating, audience_rating, directors, actors, original_release_date
FROM rotten_tomatoes_movies
WHERE actors LIKE '%Kate Winslet%' OR actors LIKE '%Leonardo DiCaprio%'
ORDER BY audience_rating DESC
LIMIT 10
"
top10_audience_movies <- dbGetQuery(conn, query_top10_audience)

# Extract unique directors for Audience
directors_audience <- unique(top10_audience_movies$directors)

### Query for All Movies by These Directors
query_all_audience_movies <- sprintf("
SELECT movie_title, tomatometer_rating, audience_rating, directors, actors, original_release_date
FROM rotten_tomatoes_movies
WHERE directors IN (%s)
", paste(sprintf("'%s'", directors_audience), collapse = ", "))
all_audience_movies <- dbGetQuery(conn, query_all_audience_movies)

### Cleanup: Disconnect from the Database
dbDisconnect(conn)
# Add actor category, average ratings, and release year for Tomatometer movies
all_movies <- all_movies %>%
  mutate(
    actor_category = case_when(
      grepl("Kate Winslet", actors) & grepl("Leonardo DiCaprio", actors) ~ "Bæði",
      grepl("Kate Winslet", actors) ~ "Kate",
      grepl("Leonardo DiCaprio", actors) ~ "Leo",
      TRUE ~ "None"
    ),
    avg_tomatometer = mean(tomatometer_rating, na.rm = TRUE),
    original_release_year = year(as.Date(original_release_date)),
    directors = gsub(" ", "\n", directors) # Format director names
  )


# Create the Tomatometer Rating Graph
p <- ggplot(all_movies, aes(x = directors, y = tomatometer_rating)) +
  geom_boxplot(
    aes(text = paste("Leikstjóri:", directors,
                     "<br>Meðal Tomatometer einkunn:", round(avg_tomatometer, 1))),
    fill = "blue", outlier.shape = NA
  ) +
  geom_point(
    data = subset(all_movies, actor_category != "None"),
    aes(color = actor_category, 
        text = paste("Mynd:", movie_title, 
                     "<br>Útgáfuár:", original_release_year, 
                     "<br>Tomatometer einkunn:", tomatometer_rating)),
    size = 3
  ) +
  labs(title = "Tomatometer einkunnadreifing leikstjóra topp 10 Kate/Leo mynda",
       x = NULL, 
       y = "Tomatometer einkunn",
       color = NULL) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
  scale_color_manual(values = c("Bæði" = "gold", "Leo" = "skyblue", "Kate" = "lightpink"))
## Warning in geom_boxplot(aes(text = paste("Leikstjóri:", directors, "<br>Meðal
## Tomatometer einkunn:", : Ignoring unknown aesthetics: text
## Warning in geom_point(data = subset(all_movies, actor_category != "None"), :
## Ignoring unknown aesthetics: text
# Convert to interactive Plotly graph
interactive_plot <- ggplotly(p, tooltip = "text")
interactive_plot
# Add actor category, average ratings, and release year for Audience movies
all_audience_movies <- all_audience_movies %>%
  mutate(
    actor_category = case_when(
      grepl("Kate Winslet", actors) & grepl("Leonardo DiCaprio", actors) ~ "Bæði",
      grepl("Kate Winslet", actors) ~ "Kate",
      grepl("Leonardo DiCaprio", actors) ~ "Leo",
      TRUE ~ "None"
    ),
    avg_audience_rating = mean(audience_rating, na.rm = TRUE),
    original_release_year = year(as.Date(original_release_date)),
    directors = gsub(" ", "\n", directors) # Format director names
  )

# Create the Audience Rating Graph
p_audience <- ggplot(all_audience_movies, aes(x = directors, y = audience_rating)) +
  geom_boxplot(
    aes(text = paste("Leikstjóri:", directors,
                     "<br>Meðal Audience einkunn:", round(avg_audience_rating, 1))),
    fill = "green", outlier.shape = NA
  ) +
  geom_point(
    data = subset(all_audience_movies, actor_category != "None"),
    aes(color = actor_category, 
        text = paste("Mynd:", movie_title, 
                     "<br>Útgáfuár:", original_release_year, 
                     "<br>Audience einkunn:", audience_rating)),
    size = 3
  ) +
  labs(title = "Audience einkunnadreifing leikstjóra topp 10 Kate/Leo mynda",
       x = NULL, 
       y = "Audience einkunn",
       color = NULL) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) +
  scale_color_manual(values = c("Bæði" = "gold", "Leo" = "skyblue", "Kate" = "lightpink"))
## Warning in geom_boxplot(aes(text = paste("Leikstjóri:", directors, "<br>Meðal
## Audience einkunn:", : Ignoring unknown aesthetics: text
## Warning in geom_point(data = subset(all_audience_movies, actor_category != :
## Ignoring unknown aesthetics: text
# Convert to interactive Plotly graph
interactive_plot_audience <- ggplotly(p_audience, tooltip = "text")
interactive_plot_audience